######################################################
# The Representational Effects of Communal Property: #
#        Evidence from Peru's Indigenous Groups      #
#                Christopher L. Carter               #
######################################################


#########################################################
#set working directory
setwd("C:/Users/chris/Dropbox/Dissertation/related_papers/collective_action/Replication_files")

#Read in user-defined functions
#source("packages.R")
source("rep_effects_functions.R")


###########################################
##               Figure 3                ##
##             CENAGRO 2012              ##
##  http://inei.inei.gob.pe/microdatos/  ##
###########################################

comm_cens <- read.csv("community_census_final.csv",
                      stringsAsFactors = FALSE,
                      encoding = "UTF-8")

#Panel A: Market integration

bins_m <- stats.bin(comm_cens$year_rec, 
                    comm_cens$none, 
                    breaks = seq(1930, 2009, by = 1))


plot(2009-bins_m$centers, 1-bins_m$stats[c("mean"),], 
     cex = 0.85, pch = 19, 
     xlab = 
       "Years since title (Year 0 = 2009)", 
     ylab = 
       "Likelihood of engaging in\n market activities (2012)", 
     ylim = c(0,1), cex.lab = 1.25, cex.axis = 1.25)


#Panel B: Ayni

breaks <- seq(1930, 2009, by = 1)

bins <- stats.bin(comm_cens$year_rec, 
                  comm_cens$ayni, 
                  breaks = breaks)


plot(2009-bins$centers, bins$stats[c("mean"),], cex = 0.85, 
     pch = 19, xlab = "Years since title (Year 0 = 2009)", 
     ylab="Likelihood of preserving\n reciprocity 
     institutions (2012)", ylim = c(0,1), cex.lab = 1.25, 
     cex.axis = 1.25)

###########################################
##               Figure 4                ##
##             CENAGRO 2012              ##
##  http://inei.inei.gob.pe/microdatos/ ##
###########################################

##Create bins for interactive measure (comm_cens$none + comm_cens$ayni)

bins <- stats.bin(comm_cens$year_rec, 
                  comm_cens$int, 
                  breaks = breaks)

par(mar=c(5,6,4,1)+.1)
plot(2009 - bins$centers, bins$stats[c("mean"),], 
     cex = 0.85, pch = 19, xlab = "Years since title 
     (Year 0 = 2009)", ylab = "Market-Ayni index (2012)", 
     ylim = c(0, 2), cex.lab = 1.25, cex.axis = 1.25)


###########################################
##                Table 1                ##
##             CENAGRO 2012              ##
##  http://inei.inei.gob.pe/microdatos/  ##
###########################################

#FE model with district FEs and covariates

mod_fes <- felm(int ~ years_since_title + quechua + 
                  aymara + col_title|
                  dist|0|0, comm_cens)

stargazer(mod_fes)


###############################
##         Table 2           ##
###############################

##Test for covariate imbalance between treatment and control
data <- read.csv("data_communities_campesinas.csv", 
                 encoding = "iso-8859-1")

#Code data for whether the partner is a community member 

data$campesino <- NA
data$campesino[as.character(data$exp_opt) == 
                 "Es Campesino"] <- 1
data$campesino[as.character(data$exp_opt) == 
                 "Es Cusqueno"] <- 0


##Generate outcome 1: Amount given by player one##
data$out1 <- as.numeric(as.character(data$player1))

##Generate outcome 2: Dummy for giving something vs. nothing
data$out2 <- NA
data$out2[data$out1>0] <- 1
data$out2[data$out1==0] <- 0


##Code compliance

data$compliers1 <- NA

data$compliers1[as.character(data$check2) == "Si"] <- 1

data$compliers1[as.character(data$check2) == 
                  "No sabe / No recuerda" | 
                  as.character(data$check2) == 
                  "No"] <- 0

##Linear models of trust game
m1 <- felm(data$out1~data$campesino)

m2 <- felm(data$out2~data$campesino)

m3 <- felm(out1~1|0|(campesino~compliers1)|0, data)

m4 <- felm(out2~1|0|(campesino~compliers1)|0, data)

stargazer(m1, m3, m2, m4,
          title = 
            "Effect of Community Membership on Reciprocity", 
          dep.var.labels = c("Absolute amount of gift", 
                           "Binary measure of giving"),
          covariate.labels = 
            c("Partner is indigenous community member"), 
          omit.stat = c("LL", "rsq", "adj.rsq", "f"), 
          df = FALSE, no.space = TRUE)


###############################
##         Figure 5          ##
###############################

#Code presence of ayni

data$ayni <- 0 
data$ayni[trimws(as.character(data$P05801)) == "AYNI"|
            trimws(as.character(data$P05802)) == "AYNI"|
            trimws(as.character(data$P05803)) == "AYNI"] <- 1

plot_iv_itt_2(data$out1, data$campesino, data$distrito, 
              data$comunidad_full, data$compliers1, 
              cbind(data$ayni), 
              c("Existence of reciprocity institutions 
                (self-reported in 2012 census)"), 
              level = c("Yes", "No"), "", 
              "Difference in size of gift in Soles
              (2018 experiment)")

###############################
##         Figure 6          ##
###############################

#Code period of communal title

data$title_date <- NA
 
data$title_date[as.numeric(as.character(data$year_rec)) < 
                  1960] <- 0

data$title_date[as.numeric(as.character(data$year_rec)) >= 
                  1960] <- 1

data$title_date[is.na(as.numeric(as.character(data$year_rec))) ==
                  TRUE] <- 2

plot_iv_itt_3(data$out1, data$campesino, data$distrito, 
              data$comunidad_full, data$compliers1, 
              cbind(data$title_date), 
              c("Communal title granted:"), 
              level = c("Before 1960", "After 1960", 
                        "No Title"), "",
              "Difference in size of gift\n (Soles)")

plot_iv_itt_3(data$out2, data$campesino, data$distrito, 
              data$comunidad_full, data$compliers1, 
              cbind(data$title_date), 
              c("Communal title granted:"), 
              level = c("Before 1960", "After 1960", 
                        "No Title"), "",
              "Difference in probability of gift\n (Soles)")

###############################
##         Figure 7          ##
###############################

#Read in candidate CV dataset
cand <- read.csv("df_cands.csv", 
                 stringsAsFactors = FALSE)

#Subset to cases where there is more than 1 candidate

cand <- cand[cand$cands > 1,]

cand$ayni <- as.numeric(cand$ayni)

cand$more_than_one <- NA
cand$more_than_one[cand$com_cand > 1] <- 1
cand$more_than_one[cand$com_cand == 1] <- 0


#Municipalities where ayni is preserved in all communities
cand$all_ayni <- NA
cand$all_ayni[cand$ayni == 1] <- 1
cand$all_ayni[cand$ayni != 1] <- 0

#Panel A

test <- ttest(cand$more_than_one[cand$com_cand>0 & 
                                   cand$ayni > 0],
              cand$all_ayni[cand$com_cand>0 & 
                              cand$ayni > 0])

coef <- c(test$control_mean, test$treat_mean)

se <- c(test$se_control, test$se_treat)

names <- c("Ayni mobilization:")

level <- c("In some", "In all communities")

outs <- cbind.data.frame(factor(rep(names, 
                                    each = 2), 
                                levels = unique(names)), 
                         factor(level, levels = unique(level)),
                         as.vector(coef), as.vector(se))

xlab <- "Ayni use:"

ylab <- "Probability that more than one\n community-member candidate runs"

names(outs) <- c("Category", "bw", "point_est", "se")

g <- ggplot(outs, aes(x=bw, y=point_est, group = bw)) + 
  geom_errorbar(aes(ymin=point_est - (1.96 * se), 
                    ymax=point_est + (1.96*se)), 
                width=0, lwd = 0.5) +
  geom_errorbar(aes(ymin=point_est - (1.645 * se), 
                    ymax=point_est + (1.645*se)), 
                width=0, lwd = 1.25) +
  geom_point(aes(y = point_est), cex = 2) +
  labs(x = "", y = ylab) +
  theme_bw()  + 
  facet_wrap(~Category, strip.position = "bottom", scales = "free_x")+
  theme(axis.text=element_text(size=18),
        axis.title=element_text(size=18,face="bold"),
        strip.text.x = element_text(size=18))

print(g)

#Panel B
test <- ttest(cand$com_cand[cand$com_cand>0 & 
                              cand$ayni > 0], 
              cand$all_ayni[ cand$com_cand>0 & 
                               cand$ayni > 0])

coef <- c(test$control_mean, test$treat_mean)

se <- c(test$se_control, test$se_treat)

names <- c("Ayni mobilization:")
level <- c("In some communities", "In all communities")
outs <- cbind.data.frame(factor(rep(names, each = 2), 
                                levels = unique(names)), 
                         factor(level, levels = unique(level)), 
                         as.vector(coef), as.vector(se))

xlab <- "Ayni mobilization:"

ylab <- "Number of community-\n member candidates"

names(outs) <- c("Category", "bw", "point_est", "se")


g1 <- ggplot(outs, aes(x=bw, y=point_est, group = bw)) + 
  geom_errorbar(aes(ymin=point_est - (1.96 * se), 
                    ymax=point_est + 
                      (1.96*se)), width=0, lwd = 0.5) +
  geom_errorbar(aes(ymin=point_est - (1.645 * se), 
                    ymax=point_est + 
                      (1.645*se)), width=0, lwd = 1.25) +
  geom_point(aes(y = point_est), cex = 2) +
  labs(x = "", y = ylab) +
  theme_bw() + 
  facet_wrap(~Category, strip.position = "bottom", 
             scales = "free_x") +
  theme(axis.text=element_text(size=18),
        axis.title=element_text(size=18,face="bold"),
        strip.text.x = element_text(size=18))

print(g1)

###############################
##         Figure 8          ##
###############################

#Read in conjoint dataset

conj <- read.csv("conjoint.csv")

######################
## Ratings Outcome ##
######################

##Full model with all attributes
m1 <- lm(conj$ratings ~ conj$tu_comunidad + 
           conj$una_comunidad +
           conj$male + conj$water + conj$ed + 
           conj$pol_party)


output1 <- coeftest(m1, vcov = vcovHC(m1, type = "HC1"))
output1

##Model with only community
m2 <- lm(conj$ratings ~ conj$tu_comunidad + 
           conj$una_comunidad)

output2 <- coeftest(m2, vcov = vcovHC(m1, type = "HC1"))
output2

##Model with all attributes and robust, clustered SEs

##Degree of freedom adjustment
G <- length(unique(conj$id))
N <- length(conj$id)
dfa <- (G/(G-1)) * (N-1)/m1$df.residual

#Cluster with df adj
cluster_c_vcov <- dfa * vcovHC(m1, type="HC", 
                               cluster="group", adjust=T)

##Get cluster-adjusted robust estimates
output3 <- coeftest(m1, vcov=cluster_c_vcov)
output3

##Model with only community vars and robust, clustered SEs

##Degree of freedom adjustment
G <- length(unique(conj$id))
N <- length(conj$id)
dfa <- (G/(G-1)) * (N-1)/m2$df.residual

#Cluster with df adj
cluster_c_vcov <- dfa * vcovHC(m2, type = "HC", 
                               cluster = "group", 
                               adjust = T)

##Get cluster-adjusted robust estimates
output4 <- coeftest(m2, vcov = cluster_c_vcov)
output4

######################
## Discrete Outcome ##
######################

##Full model with all attributes
##
m3 <- lm(conj$outcome ~ conj$tu_comunidad + 
           conj$una_comunidad + conj$male + 
           conj$water + conj$ed + conj$pol_party)

output5 <- coeftest(m3, vcov = vcovHC(m3, type = "HC1"))
output5

##Model with only community
m4 <- lm(conj$outcome ~ conj$tu_comunidad + 
           conj$una_comunidad)

output6 <- coeftest(m4, vcov = vcovHC(m4, type = "HC1"))

##Model with all attributes and robust, clustered SEs

##Degree of freedom adjustment
G <- length(unique(conj$id))

N <- length(conj$id)

dfa <- (G/(G-1)) * (N-1)/m3$df.residual

#Cluster with df adj
cluster_c_vcov <- dfa * vcovHC(m3, type = "HC", 
                               cluster = "group", 
                               adjust = T)

##Get cluster-adjusted robust estimates
output7 <- coeftest(m3, vcov=cluster_c_vcov)
output7

##Model with only community vars and robust, clustered SEs

##Degree of freedom adjustment
G <- length(unique(conj$id))
N <- length(conj$id)
dfa <- (G/(G-1)) * (N-1)/m4$df.residual

#Cluster with df adj
cluster_c_vcov <- dfa * vcovHC(m4, type = "HC", 
                               cluster = "group", 
                               adjust = T)

##Get cluster-adjusted robust estimates
output8 <- coeftest(m4, vcov = cluster_c_vcov)
output8

#######################################
##Begin plot by plotting coefficients##
#######################################

##Rating outcome##

##Save output from regression 3

o <- output3

names <- c("District capital", "Your community", 
           "Another community", "Female", 
           "Male", "Roads", "Water", "Education", 
           "Regional Movement", "Political Party")

##Point estimate

point_est <- c(0, o[2,1], o[3,1], 0, o[4,1], 0, o[5,1], o[6,1], 
               0, o[7,1])

##Standard error
se <- c(0, o[2,2], o[3,2], 0, o[4,2], 0, o[5,2], o[6,2], 
        0, o[7,2])

newdf <- cbind.data.frame(names, point_est, se)

##Get confidence intervals
newdf$ci_u <- newdf$point_est+ (se*1.96)
newdf$ci_l <- newdf$point_est- (se*1.96)

newdf$names1 <- factor(newdf$names, 
                       levels = c("District capital", 
                                "Your community", 
                                "Another community",
                                "Female", "Male", 
                                "Roads", "Water",
                                "Education", 
                                "Regional Movement",
                                "Political Party"))

p <- ggplot(newdf, aes(x = newdf$names1)) + 
  
  geom_point(aes(y = as.numeric(newdf$point_est)), size=3) + 
  
  scale_y_continuous(limits = c(-1,1)) + 
  
  geom_errorbar(aes(ymin=ci_l, ymax=ci_u), width = 0.1) + 
  
  labs(title="", x = "Candidate attributes", 
       y = "Ranking of candidate\n 
       (with 95% confidence intervals)") + 
  
  theme_bw() + geom_blank() + 
  
  theme(text = element_text(size=25), panel.grid.major.x =
          element_blank(), panel.grid.minor.y = 
          element_blank()) +
  
  theme(legend.title=element_blank()) + 
  
  geom_hline(yintercept=0, lty=2, col="slateblue") + 
  
  coord_flip()

print(p)

dev.off()
##Discrete outcome##

o2 <- output7

names <- c("District capital", "Your community", "Another community",
           "Female", "Male", "Roads", "Water", "Education", 
           "Regional Movement", "Political Party")

##Point estimate
point_est <- c(0, o2[2,1], o2[3,1], 0, o2[4,1], 
               0, o2[5,1], 
               o2[6,1], 0, o2[7,1])

##Standard error
se <- c(0, o2[2,2], o2[3,2], 0, o2[4,2], 
        0, o2[5,2], o2[6,2],
        0, o2[7,2])

newdf <- cbind.data.frame(names, point_est, se)

##Get confidence intervals
newdf$ci_u_d <- newdf$point_est+ (se*1.96)

newdf$ci_l_d <- newdf$point_est- (se*1.96)

##Recode names
newdf$names1 <- factor(newdf$names, levels = 
                         c("District capital", 
                           "Your community", 
                           "Another community", 
                           "Female", "Male", 
                           "Roads", "Water", 
                           "Education", 
                           "Regional Movement", 
                           "Political Party"))

p1 = ggplot(newdf, aes(x = newdf$names1)) + 
  
  geom_point(aes(y = as.numeric(newdf$point_est)), size=3) + 
  
  scale_y_continuous(limits = c(-0.3,0.3)) +
  
  geom_errorbar(aes(ymin = ci_l_d, ymax = ci_u_d), 
                width = 0.1) + 
  
  labs(title = "", x = "Candidate attributes", 
       y = "Probability of choosing candidate\n 
       (with 95% confidence intervals)") + 
  
  theme_bw() + geom_blank() + 
  
  theme(text = element_text(size=25), 
        panel.grid.major.x = element_blank(),
        panel.grid.minor.y = element_blank()) + 
  
  theme(legend.title=element_blank()) + 
  
  geom_hline(yintercept=0, lty=2, col="slateblue") + 
  
  coord_flip()


print(p1)


###############################
##         Table 3           ##
###############################

#Calculate treatment effect of lab-in-the field by municipality to obtain a behavioral measure of the persistence of ayni

dist_names<- unique(conj$dist)

out <- matrix(NA, length(dist_names), 3)

for(i in 1:length(dist_names)){
  
  mod <- coef(summary(felm(conj$gift ~ conj$campesino |
                             0 | 0 | 0, subset = 
                             as.character(conj$dist) ==
                             as.character(dist_names)[i])))
  
  out[i,] <- c(as.character(dist_names[i]), mod[2,1], 
               mod[2,3])
  
}


out <- as.data.frame(out)

names(out) <- c("name", "effect", "t")

conj$name <- as.character(conj$dist)

#Merge conjoint data with aggregated data from lab-in-the-field
data2 <- merge(conj, out, by = "name")


#Code dummy variable for whether there is evidence of reciprocity in municipal-level results for lab-in-the-field (p<0.1)
data2$lab_in_field <- 0

data2$lab_in_field[as.numeric(as.character(data2$t)) > 
                     1.6] <- 1

#Create variable for whether there is evidence of behavioral and/or self-reported reciprocity

data2$ev <- NA

#No evidence according to either measure
data2$ev[data2$ayni == 0 & data2$lab_in_field == 0] <- 
  "None"

#Evidence of only self-reported ayni
data2$ev[data2$ayni == 1 & data2$lab_in_field == 0] <- 
  "Self-reported"

#Evidence of only behavioral ayni
data2$ev[data2$ayni == 0 & data2$lab_in_field == 1] <-
  "Behavioral"

#Behavioral and self-reported evidence of ayni
data2$ev[data2$ayni ==1 & data2$lab_in_field == 1] <- 
  "Both"


mod_none <- felm(outcome~tu_comunidad+una_comunidad | 
                  0 | 0 | id, data2, 
                 subset = data2$ev == "None")

mod_self <- felm(outcome~tu_comunidad+una_comunidad |
                  0 | 0 | id, data2, 
                 subset = data2$ev == "Self-reported")

mod_beh <- felm(outcome~tu_comunidad+una_comunidad |
                  0 | 0 | id, data2, 
                subset = data2$ev == "Behavioral")

mod_both <- felm(outcome~tu_comunidad+una_comunidad |
                  0 | 0 | id, data2, 
                 subset = data2$ev == "Both")


stargazer(mod_none, mod_self, mod_beh, mod_both)



######################################
##   Appendix Tables and Figures    ##
######################################

######################################
##          Table A1                ##
######################################

summary(felm(market~ayni, comm_cens))

###############################
##         Table A2          ##
###############################

##Test for covariate imbalance between treatment and control


#Prepare list of covariates
covs <- c("head", "less_than_100", "between_100_600", 
          "morethan_600", "male", "age", "ed_prim", 
          "ed_sec", "lit", "farm", "bus", "lab", 
          "con", "pres", "private", "comm", "prev_govt",
          "distance")

col_nums <- match(covs, names(data))

l <- lapply(col_nums, function(x) mean_diff(data[,x], 
                                            data$campesino))

bal_df <- data.frame(matrix(unlist(l), nrow=length(l), 
                            byrow=T))[,c(1:3, 6, 7)]

rownames(bal_df) <- c("Head of household", 
                      "Income (Less than S./100)",
                      "Income (between S./100 and S./600)", 
                      "Income (More than S./600)", "Male", 
                      "Age", "Education (Primary)", 
                      "Education (Secondary)", "Literate", 
                      "Farmer", "Business/sales", 
                      "Laborer (other)", 
                      "Construction", "Current president", 
                      "Private landholder", 
                      "Resident of communal land", 
                      "Previous municipal official", 
                      "Distance to district capital (< 1 hour)")

names(bal_df) <- names(unlist(l))[c(1:3, 6, 7)]

bal_df
######################################
##          Table A3                ##
######################################

stargazer(m1, m2, m3, m4, 
          se = list(NULL, c(output3[,2], 
                            output4[,2], 
                            output7[,2], 
                            output8[,2])),
          title="Effect of Candidate Attributes on Vote Choice", 
          align=TRUE, dep.var.labels=c("Rating", 
                                       "Discrete Choice"),
          
          covariate.labels = c("Same community", "Different 
          community", "Male", "Chief Policy: Water", 
                               
          "Chief Policy: Education", "Political Party"),
          
          omit.stat=c("LL", "ser", "f"),
          
          no.space=TRUE)


######################################
##       Figures A1 and A2          ##
######################################


plot_iv_cace_3(data$out2, data$campesino, data$distrito, 
               data$comunidad_full, comp = data$compliers1, 
               cbind(data$title_date), 
               c("Communal title granted:"),
               level = c("Before 1960", "After 1960", 
                         "No Title"), "",
               "Probability of gift\n (Soles)")


plot_iv_cace_3(data$out1, data$campesino, data$distrito, 
               data$comunidad_full, data$compliers1, 
               cbind(data$title_date), 
               c("Communal title granted:"),
               level = c("Before 1960", "After 1960", 
                         "No Title"), "",
               "Difference in size of gift\n (Soles)")


######################################
##           Table A4               ##
######################################

data_profiles <- 
  cbind.data.frame(cbind(conj$outcome[conj$profile == "A"],
                         conj$ratings[conj$profile == "A"],
                         conj$id[conj$profile == "A"]),
                   cbind(conj$outcome[conj$profile == "A"], 
                         conj$ratings[conj$profile == "B"],
                         conj$id[conj$profile == "A"]))

names(data_profiles) <- c("discrete_a", "rating_a", "id_a", 
                          "discrete_b", "rating_b", "id_b")

#Check to see if merge connected ids correctly
summary(data_profiles$id_a == data_profiles$id_b)

#Code difference in profile ratings
data_profiles$rating_diff <- data_profiles$rating_a - 
  data_profiles$rating_b

#Code for which profile was rated more favorably---if either

data_profiles$rating_cons <- NA

data_profiles$rating_cons[data_profiles$rating_diff > 0] <- 
  "Rated A more favorably"

data_profiles$rating_cons[data_profiles$rating_diff == 0] <- 
  "No difference"

data_profiles$rating_cons[data_profiles$rating_diff < 0] <- 
  "Rated B more favorably"

#Calculate correspondence between discrete and rating measures
tab <- prop.table(table(data_profiles$discrete_a, data_profiles$rating_cons), 2)

rownames(tab) <- c("Chose B", "Chose A")

print(tab[2:1,c(2,1,3)])

######################################
##           Table A5               ##
######################################

#Code compliers using don't know as "No"

data$compliers2 <- NA

data$compliers2[as.character(data$check2)=="Si"] <- 1

data$compliers2[as.character(data$check2)=="No sabe / No recuerda"] <- 0


m5 <- felm(out1 ~ 1 | 0 | 
             (campesino ~ compliers2) | 0, data)
m6 <- felm(out2 ~ 1 | 0 |
             (campesino ~ compliers2) | 0, data)

stargazer(m5, m6, title = 
            "Effect of Community Membership on Reciprocity", 
          dep.var.labels = c("Absolute amount of gift", 
                           "Binary measure of giving"),
          covariate.labels = c("Partner is an indigenous 
                               community member"), 
          omit.stat = c("LL", "rsq", "adj.rsq", "f"), 
          df = FALSE,
          no.space = TRUE)

######################################
##           Table A6               ##
######################################


mod <- lm(comm_cens$ayni ~ comm_cens$priv)

mod1 <- lm(comm_cens$market ~ comm_cens$priv)

mod2 <- lm(comm_cens$int ~ comm_cens$priv)

stargazer(mod, mod1, mod2)

########################################
## Other analysis referenced in paper ##
########################################

##Effects of community size on reciprocity institutions

summary(felm(size ~ ayni | 0 | 0 | 0, comm_cens))
